#import statements
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
#reading in data
df = pd.read_csv("loan_data.csv")
df
#breakdown of data
df.describe()
df['credit.policy'].value_counts().plot(kind='bar')
df.groupby('credit.policy').size()
df['purpose'].value_counts().plot(kind='pie')
df.groupby('purpose').size()
df.groupby('inq.last.6mths').size()
df.groupby('delinq.2yrs').size()
df.groupby('pub.rec').size()
df['not.fully.paid'].value_counts().plot(kind='bar')
df.groupby('not.fully.paid').size()
#interest rate by installment
x = df['int.rate']
y = df['installment']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by annual income
plt.scatter(df['int.rate'], df['log.annual.inc'])
plt.show()
#interest rate by debt to income
plt.scatter(df['int.rate'], df['dti'])
plt.show()
#interest rate by fico
x = df['int.rate']
y = df['fico']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by credit line
x = df['int.rate']
y = df['days.with.cr.line']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by revolving balance
plt.scatter(df['int.rate'], df['revol.bal'])
plt.show()
#interest rate by utilization rate
plt.scatter(df['int.rate'], df['revol.util'])
plt.show()
#interest rate by inquiry of last six months
plt.scatter(df['int.rate'], df['inq.last.6mths'])
plt.show()
#installment by annual income
x = df['installment']
y = df['log.annual.inc']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#installment by debt to income
plt.scatter(df['installment'], df['dti'])
plt.show()
#installment by fico
plt.scatter(df['installment'], df['fico'])
plt.show()
#installment by credit line
plt.scatter(df['installment'], df['days.with.cr.line'])
plt.show()
#installment by revolving balance
plt.scatter(df['installment'], df['revol.bal'])
plt.show()
#installment by utilization rate
plt.scatter(df['installment'], df['revol.util'])
plt.show()
#installment by inquiry of last six months
plt.scatter(df['installment'], df['inq.last.6mths'])
plt.show()
#annual income by debt to income
plt.scatter(df['log.annual.inc'], df['dti'])
plt.show()
#annual income by fico
plt.scatter(df['log.annual.inc'], df['fico'])
plt.show()
#annual income by credit line
plt.scatter(df['log.annual.inc'], df['days.with.cr.line'])
plt.show()
#annual income by revolving balance
x = df['log.annual.inc']
y = df['revol.bal']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#annual income by utilization rate
plt.scatter(df['log.annual.inc'], df['revol.util'])
plt.show()
#annual income by inquiry of last six months
plt.scatter(df['log.annual.inc'], df['inq.last.6mths'])
plt.show()
#dti by fico
plt.scatter(df['dti'], df['fico'])
plt.show()
#dti by credit line
plt.scatter(df['dti'], df['days.with.cr.line'])
plt.show()
#dti by revolving balance
plt.scatter(df['dti'], df['revol.bal'])
plt.show()
#dti by utilization rate
plt.scatter(df['dti'], df['revol.util'])
plt.show()
#dti by inquiry of last six months
plt.scatter(df['dti'], df['inq.last.6mths'])
plt.show()
#fico by credit line
x = df['fico']
y = df['days.with.cr.line']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by revolving balance
plt.scatter(df['fico'], df['revol.bal'])
plt.show()
#fico by utilization rate
x = df['fico']
y = df['revol.util']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by inquiry of last six months
x = df['fico']
y = df['inq.last.6mths']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#credit line by revolving balance
plt.scatter(df['days.with.cr.line'], df['revol.bal'])
plt.show()
#credit line by utilization rate
plt.scatter(df['days.with.cr.line'], df['revol.util'])
plt.show()
#fico by inquiry of last six months
x = df['days.with.cr.line']
y = df['inq.last.6mths']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#revolving balance by utilization rate
plt.scatter(df['revol.bal'], df['revol.util'])
plt.show()
#revolving balance by inquiry of last six months
plt.scatter(df['revol.bal'], df['inq.last.6mths'])
plt.show()
#utilization rate by inquiry of last six months
plt.scatter(df['revol.util'], df['inq.last.6mths'])
plt.show()
#by credit policy
hasCreditPolicy = df['credit.policy']==1
df_hasCreditPolicy = df[hasCreditPolicy]
noCreditPolicy = df['credit.policy']==0
df_noCreditPolicy = df[noCreditPolicy]
df_hasCreditPolicy = df_hasCreditPolicy.reset_index()
df_noCreditPolicy = df_noCreditPolicy.reset_index()
#have credit policy metrics
df_hasCreditPolicy.describe()
#no credit policy metrics
df_noCreditPolicy.describe()
#interest rate by installment
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['installment']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['installment']
df_noCreditPolicy.plot(x='int.rate', y='installment', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by annual income
df_hasCreditPolicy.plot(x='int.rate', y='log.annual.inc', style='o')
df_noCreditPolicy.plot(x='int.rate', y='log.annual.inc', style='rx')
#interest rate by debt to income
#has credit policy
df_hasCreditPolicy.plot(x='int.rate', y='dti', style='o')
#no credit policy
#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['dti']
df_noCreditPolicy.plot(x='int.rate', y='dti', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by fico
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['fico']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['fico']
df_noCreditPolicy.plot(x='int.rate', y='fico', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by days w/ credit
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['days.with.cr.line']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
df_noCreditPolicy.plot(x='int.rate', y='days.with.cr.line', style='rx')
#interest rate by revolving balance
df_hasCreditPolicy.plot(x='int.rate', y='revol.bal', style='o')
df_noCreditPolicy.plot(x='int.rate', y='revol.bal', style='rx')
#interest rate by utilization rate
#has credit policy
x = df_hasCreditPolicy['int.rate']
y = df_hasCreditPolicy['revol.util']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['int.rate']
y = df_noCreditPolicy['revol.util']
df_noCreditPolicy.plot(x='int.rate', y='revol.util', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by inquisition by last 6 months
df_hasCreditPolicy.plot(x='int.rate', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='int.rate', y='inq.last.6mths', style='rx')
#installment by annual income
#has credit policy
x = df_hasCreditPolicy['installment']
y = df_hasCreditPolicy['log.annual.inc']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['installment']
y = df_noCreditPolicy['log.annual.inc']
df_noCreditPolicy.plot(x='installment', y='log.annual.inc', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#installment by debt to income
df_hasCreditPolicy.plot(x='installment', y='dti', style='o')
df_noCreditPolicy.plot(x='installment', y='dti', style='rx')
#installment by fico
df_hasCreditPolicy.plot(x='installment', y='fico', style='o')
df_noCreditPolicy.plot(x='installment', y='fico', style='rx')
#installment by days w/ credit
df_hasCreditPolicy.plot(x='installment', y='days.with.cr.line', style='o')
df_noCreditPolicy.plot(x='installment', y='days.with.cr.line', style='rx')
#installment by revolving balance
df_hasCreditPolicy.plot(x='installment', y='revol.bal', style='o')
df_noCreditPolicy.plot(x='installment', y='revol.bal', style='rx')
#installment by utilization rate
df_hasCreditPolicy.plot(x='installment', y='revol.util', style='o')
df_noCreditPolicy.plot(x='installment', y='revol.util', style='rx')
#installment by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='installment', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='installment', y='inq.last.6mths', style='rx')
#annual income by debt to income
df_hasCreditPolicy.plot(x='log.annual.inc', y='dti', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='dti', style='rx')
#annual income by fico
df_hasCreditPolicy.plot(x='log.annual.inc', y='fico', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='fico', style='rx')
#annual income by days w/ credit
#has credit policy
x = df_hasCreditPolicy['log.annual.inc']
y = df_hasCreditPolicy['days.with.cr.line']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['log.annual.inc']
y = df_noCreditPolicy['days.with.cr.line']
df_noCreditPolicy.plot(x='log.annual.inc', y='days.with.cr.line', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#annual income by revolving balance
#has credit policy
x = df_hasCreditPolicy['log.annual.inc']
y = df_hasCreditPolicy['revol.bal']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['log.annual.inc']
y = df_noCreditPolicy['revol.bal']
df_noCreditPolicy.plot(x='log.annual.inc', y='revol.bal', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#annual income by utilization rate
df_hasCreditPolicy.plot(x='log.annual.inc', y='revol.util', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='revol.util', style='rx')
#annual income by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='log.annual.inc', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='log.annual.inc', y='inq.last.6mths', style='rx')
#debt to income by fico
df_hasCreditPolicy.plot(x='dti', y='fico', style='o')
df_noCreditPolicy.plot(x='dti', y='fico', style='rx')
#debt to income by days w/ credit
df_hasCreditPolicy.plot(x='dti', y='days.with.cr.line', style='o')
df_noCreditPolicy.plot(x='dti', y='days.with.cr.line', style='rx')
#debt to income by revolving balance
#has credit policy
x = df_hasCreditPolicy['dti']
y = df_hasCreditPolicy['revol.bal']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['dti']
y = df_noCreditPolicy['revol.bal']
df_noCreditPolicy.plot(x='dti', y='revol.bal', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#debt to income by utilization rate
df_hasCreditPolicy.plot(x='dti', y='revol.util', style='o')
df_noCreditPolicy.plot(x='dti', y='revol.util', style='rx')
#debt to income by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='dti', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='dti', y='inq.last.6mths', style='rx')
#fico by days w/ credit
#has credit policy
x = df_hasCreditPolicy['fico']
y = df_hasCreditPolicy['days.with.cr.line']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['fico']
y = df_noCreditPolicy['days.with.cr.line']
df_noCreditPolicy.plot(x='fico', y='days.with.cr.line', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by revolving balance
#has credit policy
x = df_hasCreditPolicy['fico']
y = df_hasCreditPolicy['revol.bal']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['fico']
y = df_noCreditPolicy['revol.bal']
df_noCreditPolicy.plot(x='fico', y='revol.bal', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by utilization rate
#has credit policy
x = df_hasCreditPolicy['fico']
y = df_hasCreditPolicy['revol.util']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#no credit policy
x = df_noCreditPolicy['fico']
y = df_noCreditPolicy['revol.util']
df_noCreditPolicy.plot(x='fico', y='revol.util', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#f#fico by inquisitions in last 6 months
df_hasCreditPolicy.plot(x='fico', y='inq.last.6mths', style='o')
df_noCreditPolicy.plot(x='fico', y='inq.last.6mths', style='rx')
#purpose for w/ credit policy
df_hasCreditPolicy['purpose'].value_counts().plot(kind='pie')
df_hasCreditPolicy.groupby('purpose').size()
#purpose for no credit policy
df_noCreditPolicy['purpose'].value_counts().plot(kind='pie')
df_noCreditPolicy.groupby('purpose').size()
#fully paid for w/ credit policy
df_hasCreditPolicy['not.fully.paid'].value_counts().plot(kind='pie')
print(df_hasCreditPolicy.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(6696/(6696+1014)))
#fully paid for w/ no credit policy
df_noCreditPolicy['not.fully.paid'].value_counts().plot(kind='pie')
print(df_hasCreditPolicy.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(1349/(1349+519)))
#by credit policy
allOther = df['purpose']=='all_other'
df_allOther = df[allOther]
creditCard = df['purpose']=='credit_card'
df_creditCard = df[creditCard]
debtConsol = df['purpose']=='debt_consolidation'
df_debtConsol = df[debtConsol]
education = df['purpose']=='educational'
df_education = df[education]
homeImprov = df['purpose']=='home_improvement'
df_homeImprov = df[homeImprov]
majorPurch = df['purpose']=='major_purchase'
df_majorPurch = df[majorPurch]
smallBus = df['purpose']=='small_business'
df_smallBus = df[smallBus]
df_allOther = df_allOther.reset_index()
df_creditCard = df_creditCard.reset_index()
df_debtConsol = df_debtConsol.reset_index()
df_education = df_education.reset_index()
df_homeImprov = df_homeImprov.reset_index()
df_majorPurch = df_majorPurch.reset_index()
df_smallBus = df_smallBus.reset_index()
#ALL OTHER
df_allOther.describe()
#credit policy for other
df_allOther['credit.policy'].value_counts().plot(kind='pie')
print(df_allOther.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(496/(1835+496)))
#fully paid for other
df_allOther['not.fully.paid'].value_counts().plot(kind='pie')
print(df_allOther.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(1944/(1944+387)))
#CREDIT CARD
df_creditCard.describe()
#credit policy for credit card
df_creditCard['credit.policy'].value_counts().plot(kind='pie')
print(df_creditCard.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(496/(1835+496)))
#fully paid for credit card
df_creditCard['not.fully.paid'].value_counts().plot(kind='pie')
print(df_creditCard.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(1116/(1116+146)))
#DEBT CONSOLIDATION
df_debtConsol.describe()
#credit policy for debt consolidation
df_debtConsol['credit.policy'].value_counts().plot(kind='pie')
print(df_debtConsol.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(734/(3223+734)))
#fully paid for debt consolidation
df_debtConsol['not.fully.paid'].value_counts().plot(kind='pie')
print(df_debtConsol.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(3354/(3354+603)))
#EDUCATION
df_education.describe()
#credit policy for education
df_education['credit.policy'].value_counts().plot(kind='pie')
print(df_education.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(89/(89+254)))
#fully paid for education
df_education['not.fully.paid'].value_counts().plot(kind='pie')
print(df_education.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(274/(274+69)))
#HOME IMPROVEMENT
df_homeImprov.describe()
#credit policy for home improvement
df_homeImprov['credit.policy'].value_counts().plot(kind='pie')
print(df_homeImprov.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(117/(117+512)))
#fully paid for home improvement
df_homeImprov['not.fully.paid'].value_counts().plot(kind='pie')
print(df_homeImprov.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(522/(522+107)))
#MAJOR PURCHASE
df_majorPurch.describe()
#credit policy for major purchase
df_majorPurch['credit.policy'].value_counts().plot(kind='pie')
print(df_majorPurch.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(66/(66+371)))
#fully paid for major purchase
df_majorPurch['not.fully.paid'].value_counts().plot(kind='pie')
print(df_majorPurch.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(388/(388+49)))
#SMALL BUSINESS
df_smallBus.describe()
#credit policy for small business
df_smallBus['credit.policy'].value_counts().plot(kind='pie')
print(df_smallBus.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(124/(124+495)))
#fully paid for small business
df_smallBus['not.fully.paid'].value_counts().plot(kind='pie')
print(df_smallBus.groupby('not.fully.paid').size())
print()
print("Percent who have fully paid: " + str(447/(447+172)))
#by fully paid
notFullyPaid = df['not.fully.paid']==1
df_notFullyPaid = df[notFullyPaid]
fullyPaid = df['not.fully.paid']==0
df_fullyPaid = df[fullyPaid]
df_notFullyPaid = df_notFullyPaid.reset_index()
df_fullyPaid = df_fullyPaid.reset_index()
#NOT FULLY PAID
df_notFullyPaid.describe()
#purpose for w/ not fully paid
df_notFullyPaid['purpose'].value_counts().plot(kind='pie')
df_notFullyPaid.groupby('purpose').size()
#credit policy for not fully paid
df_notFullyPaid['credit.policy'].value_counts().plot(kind='pie')
print(df_notFullyPaid.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(519/(519+1014)))
#FULLY PAID
df_fullyPaid.describe()
#purpose for w/ fully paid
df_fullyPaid['purpose'].value_counts().plot(kind='pie')
df_fullyPaid.groupby('purpose').size()
#credit policy for not fully paid
df_fullyPaid['credit.policy'].value_counts().plot(kind='pie')
print(df_fullyPaid.groupby('credit.policy').size())
print()
print("Percent who do not have a credit policy: " + str(1349/(1349+6696)))
#interest rate by installment
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['installment']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating interest rates and monthly installment payments for not fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('Monthly Installment Payments')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['installment']
df_fullyPaid.plot.scatter(x='int.rate', y='installment', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating interest rates and monthly installment payments for fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('Monthly Installment Payments')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by annual income
df_notFullyPaid.plot(x='int.rate', y='log.annual.inc', style='o')
df_fullyPaid.plot(x='int.rate', y='log.annual.inc', style='rx')
#interest rate by debt to income
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['dti']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
df_fullyPaid.plot(x='int.rate', y='dti', style='rx')
#interest rate by fico
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['fico']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating interest rates and fico scores for not fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('FICO Score')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['fico']
df_fullyPaid.plot.scatter(x='int.rate', y='fico', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating interest rates and fico scores for fully paid loans')
plt.xlabel('Interest Rate')
plt.ylabel('FICO Score')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by days with credit
#not fully paid
df_notFullyPaid.plot(x='int.rate', y='days.with.cr.line', style='o')
#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['days.with.cr.line']
df_fullyPaid.plot(x='int.rate', y='days.with.cr.line', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by revolving balance
df_notFullyPaid.plot(x='int.rate', y='revol.bal', style='o')
df_fullyPaid.plot(x='int.rate', y='revol.bal', style='rx')
#interest rate by utilization rate
#not fully paid
x = df_notFullyPaid['int.rate']
y = df_notFullyPaid['revol.util']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['int.rate']
y = df_fullyPaid['revol.util']
df_fullyPaid.plot.scatter(x='int.rate', y='revol.util', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#interest rate by inquisition by last 6 months
df_notFullyPaid.plot(x='int.rate', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='int.rate', y='inq.last.6mths', style='rx')
#installment by annual income
#not fully paid
x = df_notFullyPaid['installment']
y = df_notFullyPaid['log.annual.inc']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating monthly installment payments and annual income for not fully paid loans')
plt.xlabel('Monthly Installment Payments')
plt.ylabel('Annual Income')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['installment']
y = df_fullyPaid['log.annual.inc']
df_fullyPaid.plot.scatter(x='installment', y='log.annual.inc', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating monthly installment payments and annual income for fully paid loans')
plt.xlabel('Monthly Installment Payments')
plt.ylabel('Annual Income')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#installment by debt to income
df_notFullyPaid.plot(x='installment', y='dti', style='o')
df_fullyPaid.plot(x='installment', y='dti', style='rx')
#installment by fico
df_notFullyPaid.plot(x='installment', y='fico', style='o')
df_fullyPaid.plot(x='installment', y='fico', style='rx')
#installment by days w/ credit
df_notFullyPaid.plot(x='installment', y='days.with.cr.line', style='o')
df_fullyPaid.plot(x='installment', y='days.with.cr.line', style='rx')
#installment by revolving balance
df_notFullyPaid.plot(x='installment', y='revol.bal', style='o')
df_fullyPaid.plot(x='installment', y='revol.bal', style='rx')
#installment by revolving balance
df_notFullyPaid.plot(x='installment', y='revol.util', style='o')
df_fullyPaid.plot(x='installment', y='revol.util', style='rx')
#installment by inquisitions in last 6 months
df_notFullyPaid.plot(x='installment', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='installment', y='inq.last.6mths', style='rx')
#annual income by debt to income
df_notFullyPaid.plot(x='log.annual.inc', y='dti', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='dti', style='rx')
#annual income by debt to income
df_notFullyPaid.plot(x='log.annual.inc', y='fico', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='fico', style='rx')
#annual income by days w/ credit
#not fully paid
x = df_notFullyPaid['log.annual.inc']
y = df_notFullyPaid['days.with.cr.line']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating annual income and days within credit line for not fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Days Within Credit Line')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['log.annual.inc']
y = df_fullyPaid['days.with.cr.line']
df_fullyPaid.plot.scatter(x='log.annual.inc', y='days.with.cr.line', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating annual income and days within credit line for fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Days Within Credit Line')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#annual income by revolving balance
#not fully paid
x = df_notFullyPaid['log.annual.inc']
y = df_notFullyPaid['revol.bal']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating annual income and revolving balance for not fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Revolving Balance')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['log.annual.inc']
y = df_fullyPaid['revol.bal']
df_fullyPaid.plot.scatter(x='log.annual.inc', y='revol.bal', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating annual income and revolving balance for fully paid loans')
plt.xlabel('Annual Income')
plt.ylabel('Revolving Balance')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#annual income by utilization rate
df_notFullyPaid.plot(x='log.annual.inc', y='revol.util', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='revol.util', style='rx')
#annual income by inquisitions in last 6 months
df_notFullyPaid.plot(x='log.annual.inc', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='log.annual.inc', y='inq.last.6mths', style='rx')
#debt to income by fico
#not fully paid
x = df_notFullyPaid['dti']
y = df_notFullyPaid['fico']
plt.scatter(x, y, color="blue")
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color="red")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['dti']
y = df_fullyPaid['fico']
df_fullyPaid.plot(x='dti', y='fico', style='rx')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color="blue")
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#debt to income by days w/ credit
df_notFullyPaid.plot(x='dti', y='days.with.cr.line', style='o')
df_fullyPaid.plot(x='dti', y='days.with.cr.line', style='rx')
#debt to income by revolving balance
df_notFullyPaid.plot(x='dti', y='revol.bal', style='o')
df_fullyPaid.plot(x='dti', y='revol.bal', style='rx')
#debt to income by utilization rate
df_notFullyPaid.plot(x='dti', y='revol.util', style='o')
df_fullyPaid.plot(x='dti', y='revol.util', style='rx')
#debt to income by inquisitions in last 6 months
df_notFullyPaid.plot(x='dti', y='inq.last.6mths', style='o')
df_fullyPaid.plot(x='dti', y='inq.last.6mths', style='rx')
#fico by days w/ credit
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['days.with.cr.line']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating FICO scores and days within credit line for not fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Days Within Credit Line')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['days.with.cr.line']
df_fullyPaid.plot.scatter(x='fico', y='days.with.cr.line', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating FICO scores and days within credit line for fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Days Within Credit Line')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by revolving balance
df_notFullyPaid.plot(x='fico', y='revol.bal', style='o')
df_fullyPaid.plot(x='fico', y='revol.bal', style='rx')
#fico by utilization rate
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['revol.util']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating FICO scores and utilization rate for not fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Utilization Rate')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['revol.util']
df_fullyPaid.plot.scatter(x='fico', y='revol.util', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating FICO scores and utilization rate for fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Utilization Rate')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by days w/ credit
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['days.with.cr.line']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['days.with.cr.line']
df_fullyPaid.plot.scatter(x='fico', y='days.with.cr.line', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fico by inquisitions in last 6 months
#not fully paid
x = df_notFullyPaid['fico']
y = df_notFullyPaid['inq.last.6mths']
plt.scatter(x, y, color='#3D77D6')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.scatter(x, y)
plt.plot(x, m * x + b, color='#32BDD1')
plt.title('Correlating FICO scores and number of inquiries in the past 6 months for not fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Number of Inquiries')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#fully paid
x = df_fullyPaid['fico']
y = df_fullyPaid['inq.last.6mths']
df_fullyPaid.plot.scatter(x='fico', y='inq.last.6mths', color='#32BDD1')
stats = linregress(x, y)
m = stats.slope
b = stats.intercept
plt.plot(x, m * x + b, color='#3D77D6')
plt.title('Correlating FICO scores and number of inquiries in the past 6 months for fully paid loans')
plt.xlabel('FICO Score')
plt.ylabel('Number of Inquiries')
plt.show()
print("Slope is: " + str(m))
print("Intercept is: " + str(b))
#revolving balance by utilization rate
df_notFullyPaid.plot.scatter(x='revol.bal', y='revol.util', color='#3D77D6')
df_fullyPaid.plot.scatter(x='revol.bal', y='revol.util', color='#32BDD1')
#revolving balance by inquisition in past 6 months
df_notFullyPaid.plot.scatter(x='revol.bal', y='inq.last.6mths', color='#3D77D6')
df_fullyPaid.plot.scatter(x='revol.bal', y='inq.last.6mths', color='#32BDD1')
#utilization rate by inquisition in past 6 months
df_notFullyPaid.plot.scatter(x='revol.util', y='inq.last.6mths', color='#3D77D6')
df_fullyPaid.plot.scatter(x='revol.util', y='inq.last.6mths', color='#32BDD1')
df_notFullyPaid['int.rate'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of interest rates for not fully paid loans')
plt.xlabel('Interest Rate')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['int.rate'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of interest rates for fully paid loans')
plt.xlabel('Interest Rate')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['installment'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of monthly installments for not fully paid loans')
plt.xlabel('Monthly Installment Amount')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['installment'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of monthly installments for fully paid loans')
plt.xlabel('Monthly Installment Amount')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['log.annual.inc'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of annual income for not fully paid loans')
plt.xlabel('Annual Income')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['log.annual.inc'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of annual income for fully paid loans')
plt.xlabel('Annual Income')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['dti'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of debt to income ratio for not fully paid loans')
plt.xlabel('Debt to Income Ratio')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['dti'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of debt to income ratio for fully paid loans')
plt.xlabel('Debt to Income Ratio')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['fico'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of FICO scores for not fully paid loans')
plt.xlabel('FICO Score')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['fico'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of FICO scores for fully paid loans')
plt.xlabel('FICO Score')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['revol.bal'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of revolving balances for not fully paid loans')
plt.xlabel('Revolving Balance')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['revol.bal'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of revolving balances for fully paid loans')
plt.xlabel('Revolving Balance')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['revol.util'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of utilization rates for not fully paid loans')
plt.xlabel('Utilization Rate')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['revol.util'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of utilization rates for fully paid loans')
plt.xlabel('Utilization Rate')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['inq.last.6mths'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of inquiries within the last six months for not fully paid loans')
plt.xlabel('Number of Inquiries')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['inq.last.6mths'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of inquiries within the last six months for fully paid loans')
plt.xlabel('Number of Inquiries')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['delinq.2yrs'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of delinquencies in last two years for not fully paid loans')
plt.xlabel('Number of Delinquencies')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['delinq.2yrs'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of delinquencies in last two years for fully paid loans')
plt.xlabel('Number of Delinquencies')
plt.grid(axis='y', alpha=0.75)
df_notFullyPaid['pub.rec'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#3D77D6')
plt.title('Distribution of derogatory public records for not fully paid loans')
plt.xlabel('Number of Derogatory Public Records')
plt.grid(axis='y', alpha=0.75)
df_fullyPaid['pub.rec'].plot.hist(grid=False, bins=20, rwidth=0.9, color='#32BDD1')
plt.title('Distribution of derogatory public records for fully paid loans')
plt.xlabel('Number of Derogatory Public Records')
plt.grid(axis='y', alpha=0.75)